*	************************************************************************
*	Authors: 		Eugenie Dugoua and Johannes Urpelainen
*	Purpose:   		Stata do file to replicate the findings in: Dugoua, Eugenie and Liu, Ruinan and Urpelainen, Johannes, Geographic and Socio-Economic Barriers to Rural Electrification: New Evidence from Indian Villages (March 22, 2017). Energy Policy, Forthcoming.
*	Data Used:  	Village_master.dta and ALL_clean_Module.dta
*	Output:			None
*	Software:		Stata SE 12.1
*	Machine:		Linux Mint 17
*	************************************************************************

*	************************************************************************
*	DIRECTORY
*	************************************************************************
* 	Please place .do file and data in the same directory
* 	Change the name of this folder in the following line
cap cd "/home/emd/Dropbox/India Energy Access (Eugenie, Ruinan, Johannes)/Replication_package/"
cap cd "REPLACE_WITH_NAME_OF_YOUR_DIRECTORY"

capture log close
clear all
set more off

use Village_master
sort village_code

*************************************************************************************************************************
*************************************************************************************************************************
************** 1. Results for percentage of households using electricity  
*************************************************************************************************************************
*************************************************************************************************************************

*	************************************************************************
*	SUMMARY STATISTICS
*	************************************************************************
sum lighting_electricity distance_min log_dist totalgeo log_geo tot_p p_sc  p_st p_other log_sc log_st log_other
sum VillageGridElec_villquest VillageGridElec_hhquest Av_month_exp log_exp  ///
Nbr_households Nbr_hh_SC_survey Nbr_hh_ST_survey Nbr_hh_OBC_survey Nbr_hh_general_survey /// 
log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey ///
Herfindahl_hab gini_expenditure 


*	************************************************************************
*	CORRELATION MATRIX FOR GEOGRAPHIC VARIABLES
*	************************************************************************
label variable lighting_electricity "ER c"
label variable VillageGridElec_villquest "ER v"
label variable VillageGridElec_hhquest "ER h"
label variable log_dist "Distance"
label variable log_exp "Expenditure"
label variable log_geo "Surface"
correlate lighting_electricity VillageGridElec_villquest VillageGridElec_hhquest log_dist log_exp log_geo


*	************************************************************************
*	CORRELATION MATRIX FOR CASTES VARIABLES
*	************************************************************************
label variable lighting_electricity "ER c"
label variable VillageGridElec_villquest "ER v"
label variable VillageGridElec_hhquest "ER h"
label variable log_sc "SC c"
label variable log_st "ST c"
label variable log_other "O c"
label variable log_Nbr_hh_SC_survey "SC s"
label variable log_Nbr_hh_ST_survey "ST s"
label variable log_Nbr_hh_OBC_survey "OBC s"
label variable log_Nbr_hh_general_survey "O s"
correlate lighting_electricity VillageGridElec_villquest VillageGridElec_hhquest ///
log_sc log_st  log_other  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey


*	************************************************************************
*	HISTOGRAMS (FIGURE 3)
*	************************************************************************
histogram VillageGridElec_villquest, frequency start(-2.5) width(5) xtitle("") ///
color(gs10) lcolor(gs0) plotregion(style(none)) graphregion(style(none) color(white)) yscale(r(0 150)) ylabel(0(50)150)

histogram VillageGridElec_hhquest, frequency start(-2.5) width(5) xtitle("") ///
color(gs10) lcolor(gs0) plotregion(style(none)) graphregion(style(none) color(white))  yscale(r(0 150)) ylabel(0(50)150)

histogram lighting_electricity, frequency start(-2.5) width(5) xtitle("") ///
color(gs10) lcolor(gs0) plotregion(style(none)) graphregion(style(none) color(white))  yscale(r(0 150)) ylabel(0(50)150)


*	************************************************************************
*	MAIN RESULTS TABLE 1
*	************************************************************************
xtset m1_q9_district_code

xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo, ///
 fe vce(cluster m1_q9_district_code)
test _b[log_sc]=_b[log_other]

xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure, fe vce(cluster m1_q9_district_code)
test _b[log_sc] = _b[log_other]

xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo , fe vce(cluster m1_q9_district_code)
test _b[log_Nbr_hh_SC_survey] = _b[log_Nbr_hh_general_survey]
test _b[log_Nbr_hh_ST_survey] = _b[log_Nbr_hh_general_survey]
test _b[log_Nbr_hh_OBC_survey] = _b[log_Nbr_hh_general_survey]

xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure , ///
fe vce(cluster m1_q9_district_code)
test _b[log_Nbr_hh_SC_survey] = _b[log_Nbr_hh_general_survey]
test _b[log_Nbr_hh_ST_survey] = _b[log_Nbr_hh_general_survey]
test _b[log_Nbr_hh_OBC_survey] = _b[log_Nbr_hh_general_survey]

xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo , fe vce(cluster m1_q9_district_code)
test _b[log_Nbr_hh_SC_survey] = _b[log_Nbr_hh_general_survey]
test _b[log_Nbr_hh_ST_survey] = _b[log_Nbr_hh_general_survey]
test _b[log_Nbr_hh_OBC_survey] = _b[log_Nbr_hh_general_survey]

xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure , ///
 fe vce(cluster m1_q9_district_code)
test _b[log_Nbr_hh_SC_survey] = _b[log_Nbr_hh_general_survey]
test _b[log_Nbr_hh_ST_survey] = _b[log_Nbr_hh_general_survey]
test _b[log_Nbr_hh_OBC_survey] = _b[log_Nbr_hh_general_survey]


*************************************************************************************************************************
*************************************************************************************************************************
************** 2. RGGVY Policy Analysis
*************************************************************************************************************************
*************************************************************************************************************************
* CEM matching package
ssc install cem
tab v_q32_1_rggvy_start
tab v_q32_rggvy
tab v_q32_1_rggvy_start v_q32_rggvy 

* REPLACE RGGVY PRE-CENSUS
gen rggvy = v_q32_rggvy
replace rggvy = 0 if v_q32_1_rggvy_start > 3

* State fixed effect variable S1-S6
tab m1_q8_state, gen(S)

* CHANGE ANALYSIS, WITH RGGVY -- ONLY STATE FE OR NOTHING BECAUSE OF EMPTY DISTRICTS
gen change_hh = VillageGridElec_hhquest - lighting_electricity
gen change_vill = VillageGridElec_villquest - lighting_electricity
sum VillageGridElec_villquest, detail
* Mean = 60.21 / Median = 66.67
sum VillageGridElec_hhquest, detail
* Mean = 65.87 / Median = 75
sum lighting_electricity, detail
* Mean = 33.43 / Median = 26.35
* Convert lighting_electricity into a binary variable
gen lighting_elec_dummy = 1
* With Mean
replace lighting_elec_dummy = 0 if lighting_electricity <= 33.44
* With Median
gen lighting_elec_robust = 1
replace lighting_elec_robust = 0 if lighting_electricity <= 26.35

label variable rggvy "RGGVY"
label variable lighting_electricity "Electrification rate (0-100)"
label variable lighting_elec_dummy "Electrification rate (binary)"
label variable lighting_elec_robust "Electrification rate (binary, median)"
label variable change_hh "Change of elec ate (household survey)"
label variable change_vill "Change of elec rate (village survey)"

*	*************************************************************************************
* 	TABLE 4
*	RGGVY targeting: four regressions with rggvy being dependent variable + two matching models with rggvy being dependent variable
*	*************************************************************************************
sum rggvy
xtset m1_q8_state_code

*Table column 1-4
xi: reg rggvy lighting_electricity log_dist log_geo i.m1_q8_state, vce(cluster m1_q9_district_code)
xi: reg rggvy lighting_electricity log_dist log_geo log_exp log_sc log_st log_other i.m1_q8_state, vce(cluster m1_q9_district_code)
xi: reg rggvy lighting_electricity log_dist log_geo log_exp log_sc log_st log_other i.m1_q8_state Herfindahl_hab gini_expenditure,  vce(cluster m1_q9_district_code)
xi: logit rggvy lighting_electricity log_dist log_geo i.m1_q8_state , vce(cluster m1_q9_district_code)
xi: logit rggvy lighting_electricity log_dist log_geo log_exp log_sc log_st log_other i.m1_q8_state, vce(cluster m1_q9_district_code)
xi: logit rggvy lighting_electricity log_dist log_geo log_exp log_sc log_st log_other i.m1_q8_state Herfindahl_hab gini_expenditure, vce(cluster m1_q9_district_code)

*Table column 5-6
cem log_dist log_geo S1 S2 S3 S4 S5 S6, tr(lighting_elec_dummy)
reg rggvy lighting_elec_dummy [iweight=cem_weights]
 
cem log_dist log_geo log_sc S1 S2 S3 S4 S5 S6, tr(lighting_elec_dummy)
reg rggvy lighting_elec_dummy [iweight=cem_weights]

*	*************************************************************************************
*	RGGVY targeting: matching with median
*	*************************************************************************************
cem log_dist log_geo S1 S2 S3 S4 S5 S6, tr(lighting_elec_robust)
reg rggvy lighting_elec_robust [iweight=cem_weights]

cem log_dist log_geo log_sc S1 S2 S3 S4 S5 S6, tr(lighting_elec_robust)
reg rggvy lighting_elec_robust [iweight=cem_weights]


*****************************************
*** BALANCE TABLE
gen MatchedTreated = .
label define matchedtreated 1 "Unmatched Untreated" 2 "Unmatched Treated" 3 "Matched Untreated" 4 "Matched Treated"

cem log_dist log_geo S1 S2 S3 S4 S5 S6, tr(lighting_elec_dummy)
replace MatchedTreated = .
replace MatchedTreated = 1 if cem_matched == 0 & lighting_elec_dummy == 0
replace MatchedTreated = 2 if cem_matched == 0 & lighting_elec_dummy == 1
replace MatchedTreated = 3 if cem_matched == 1 & lighting_elec_dummy == 0
replace MatchedTreated = 4 if cem_matched == 1 & lighting_elec_dummy == 1
label values MatchedTreated matchedtreated
tabstat lighting_electricity  distance_min log_dist totalgeo log_geo tot_p p_sc p_st p_other log_sc log_st log_other, by(MatchedTreated) statistics(mean sd ) columns(statistics) nototal

cem log_dist log_geo  log_sc S1 S2 S3 S4 S5 S6, tr(lighting_elec_dummy)
replace MatchedTreated = .
replace MatchedTreated = 1 if cem_matched == 0 & lighting_elec_dummy == 0
replace MatchedTreated = 2 if cem_matched == 0 & lighting_elec_dummy == 1
replace MatchedTreated = 3 if cem_matched == 1 & lighting_elec_dummy == 0
replace MatchedTreated = 4 if cem_matched == 1 & lighting_elec_dummy == 1
label values MatchedTreated matchedtreated
tabstat lighting_electricity  distance_min log_dist totalgeo log_geo tot_p p_sc p_st p_other log_sc log_st log_other, by(MatchedTreated) statistics(mean sd ) columns(statistics) nototal


*	*********************************************************************************
* 	TABLE 5
*	RGGVY Effects: four regressions with rggvy being treatment +four matching models with rggvy being treatment
*	*********************************************************************************
xtset m1_q9_district_code

*Table column 1-4
reg change_hh rggvy log_other log_sc log_st log_exp log_dist log_geo, vce(cluster m1_q9_district_code)
reg change_vill rggvy log_other log_sc log_st log_exp log_dist log_geo, vce(cluster m1_q9_district_code)
xi: reg change_hh rggvy log_other log_sc log_st log_exp log_dist log_geo i.m1_q8_state_code, vce(cluster m1_q9_district_code)
xi: reg change_vill rggvy log_other log_sc log_st log_exp log_dist log_geo  i.m1_q8_state_code, vce(cluster m1_q9_district_code)
*Table column 5-8
cem log_dist log_geo S1 S2 S3 S4 S5 S6, tr(rggvy)
reg change_hh rggvy [iweight=cem_weights]

reg change_vill rggvy [iweight=cem_weights]

cem log_dist log_geo log_sc S1 S2 S3 S4 S5 S6, tr(rggvy)
reg change_hh rggvy [iweight=cem_weights]

reg change_vill rggvy [iweight=cem_weights]


*****************************************
*** BALANCE TABLE: RGGVY EFFECTS
drop MatchedTreated
gen MatchedTreated = .
label drop matchedtreated
label define matchedtreated 1 "Unmatched Untreated" 2 "Unmatched Treated" 3 "Matched Untreated" 4 "Matched Treated"

cem log_dist log_geo   S1 S2 S3 S4 S5 S6, tr(rggvy)
replace MatchedTreated = .
replace MatchedTreated = 1 if cem_matched == 0 & rggvy == 0
replace MatchedTreated = 2 if cem_matched == 0 & rggvy == 1
replace MatchedTreated = 3 if cem_matched == 1 & rggvy == 0
replace MatchedTreated = 4 if cem_matched == 1 & rggvy == 1
label values MatchedTreated matchedtreated
tabstat change_hh change_vill distance_min log_dist totalgeo log_geo tot_p p_sc p_st p_other log_sc log_st log_other, by(MatchedTreated) statistics(mean sd ) columns(statistics) nototal

cem log_dist log_geo  log_sc S1 S2 S3 S4 S5 S6, tr(rggvy)
replace MatchedTreated = .
replace MatchedTreated = 1 if cem_matched == 0 & lighting_elec_dummy == 0
replace MatchedTreated = 2 if cem_matched == 0 & lighting_elec_dummy == 1
replace MatchedTreated = 3 if cem_matched == 1 & lighting_elec_dummy == 0
replace MatchedTreated = 4 if cem_matched == 1 & lighting_elec_dummy == 1
label values MatchedTreated matchedtreated
tabstat change_hh change_vill distance_min log_dist totalgeo log_geo tot_p p_sc p_st p_other log_sc log_st log_other, by(MatchedTreated) statistics(mean sd ) columns(statistics) nototal


*************************************************************************************************************************
*************************************************************************************************************************
************** 3. Results using hours of electricity instead of electrification rate
*************************************************************************************************************************
*************************************************************************************************************************
histogram Av_hours_elec, frequency start(-0.5) width(1) xtitle("Average hours of electricity per day")  ///
color(gs10) lcolor(gs0) plotregion(style(none)) graphregion(style(none) color(white)) yscale(r(0 80)) ylabel(0(20)80)

xtset m1_q9_district_code
xtreg Av_hours_elec  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo , ///
 fe vce(cluster m1_q9_district_code)
test log_Nbr_hh_SC_survey log_Nbr_hh_general_survey

xtreg Av_hours_elec  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure , ///
 fe vce(cluster m1_q9_district_code)
test log_Nbr_hh_SC_survey log_Nbr_hh_general_survey


*************************************************************************************************************************
*************************************************************************************************************************
************** 4. Robustness checks: excluding one state at a time
*************************************************************************************************************************
*************************************************************************************************************************

*	************************************************************************
*	EXCLUDING UTTAR PRADESH (CODE = 9)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code != 9, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 9, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 9, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 9, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 9, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 9, ///
 fe vce(cluster m1_q9_district_code)

*	************************************************************************
*	EXCLUDING BIHAR (CODE = 10)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code != 10, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 10, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 10, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 10, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 10, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 10, ///
 fe vce(cluster m1_q9_district_code)

*	************************************************************************
*	EXCLUDING WEST BENGHAL (CODE = 19)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code != 19, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 19, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 19, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 19, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 19, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 19, ///
 fe vce(cluster m1_q9_district_code)

*	************************************************************************
*	EXCLUDING JHARKHAND (CODE = 20)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code != 20, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 20, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 20, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 20, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 20, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 20, ///
 fe vce(cluster m1_q9_district_code)

*	************************************************************************
*	EXCLUDING ODISHA (CODE = 21)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code != 21, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 21, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 21, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 21, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 21, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 21, ///
 fe vce(cluster m1_q9_district_code)

*	************************************************************************
*	EXCLUDING MADHYA PRADESH (CODE = 23)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code != 23, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 23, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 23, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 23, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code != 23, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code != 23, ///
 fe vce(cluster m1_q9_district_code)

*************************************************************************************************************************
*************************************************************************************************************************
************** 5. Heterogeneity Analysis: looking at one state only at a time
*************************************************************************************************************************
*************************************************************************************************************************

*	************************************************************************
*	ONLY UTTAR PRADESH (CODE = 9)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code == 9, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 9, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 9, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 9, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 9, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 9, ///
 fe vce(cluster m1_q9_district_code)

*	************************************************************************
*	ONLY BIHAR (CODE = 10)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code == 10, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 10, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 10, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 10, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 10, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 10, ///
 fe vce(cluster m1_q9_district_code)

*	************************************************************************
*	ONLY WEST BENGHAL (CODE = 19)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code == 19, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 19, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 19, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 19, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 19, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 19, ///
 fe vce(cluster m1_q9_district_code)

*	************************************************************************
*	ONLY JHARKHAND (CODE = 20)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code == 20, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 20, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 20, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 20, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 20, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 20, ///
 fe vce(cluster m1_q9_district_code)

*	************************************************************************
*	ONLY ODISHA (CODE = 21)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code == 21, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 21, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 21, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 21, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 21, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 21, ///
 fe vce(cluster m1_q9_district_code)

*	************************************************************************
*	ONLY MADHYA PRADESH (CODE = 23)
*	************************************************************************
xtset m1_q9_district_code
xtreg lighting_electricity log_sc log_st log_other log_exp log_dist log_geo if  m1_q8_state_code == 23, ///
 fe vce(cluster m1_q9_district_code)
xtreg lighting_electricity  log_sc log_st log_other log_exp log_dist log_geo ///
Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 23, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 23, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 23, ///
fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo  if  m1_q8_state_code == 23, fe vce(cluster m1_q9_district_code)
xtreg VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo Herfindahl_hab gini_expenditure  if  m1_q8_state_code == 23, ///
 fe vce(cluster m1_q9_district_code)


*************************************************************************************************************************
*************************************************************************************************************************
************** 6. Fractional Logit Analysis
*************************************************************************************************************************
*************************************************************************************************************************
*** For fractional logit, need variable between 0 and 1
replace lighting_electricity = lighting_electricity/100
replace VillageGridElec_villquest = VillageGridElec_villquest/100
replace VillageGridElec_hhquest = VillageGridElec_hhquest/100

*** RESULTS: ALL ROBUST
xi: fraclogit lighting_electricity log_sc log_st log_other log_exp log_dist log_geo i.m1_q9_district_code 
xi: fraclogit lighting_electricity log_sc log_st log_other log_exp log_dist log_geo Herfindahl_hab gini_expenditure i.m1_q9_district_code
xi: fraclogit VillageGridElec_villquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey  log_exp log_dist log_geo i.m1_q9_district_code
xi: fraclogit VillageGridElec_villquest   log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey  log_exp log_dist log_geo Herfindahl_hab gini_expenditure i.m1_q9_district_code
xi: fraclogit VillageGridElec_hhquest   log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey log_exp log_dist log_geo i.m1_q9_district_code
xi: fraclogit VillageGridElec_hhquest  log_Nbr_hh_SC_survey log_Nbr_hh_ST_survey ///
 log_Nbr_hh_OBC_survey log_Nbr_hh_general_survey  log_exp log_dist log_geo Herfindahl_hab gini_expenditure i.m1_q9_district_code



*************************************************************************************************************************
*************************************************************************************************************************
************** 7. Household analysis using the ACCESS survey data
*************************************************************************************************************************
*************************************************************************************************************************
clear all
use ALL_clean_Module
rename m1_q11_village_code village_code
sort village_code

*** SETTING SURVEY DATA TO ADJUST FOR NON-PROPORTIONAL SAMPLING
svyset [pweight=weight], strata(village_code) singleunit(centered)
xtset village_code
gen log_exp = log(m1_q32)
codebook m1_q25_caste
label variable log_exp "Expenditures (log)"
label define castelabels  1 "Scheduled Caste"  2 "Scheduled Tribe" 3 "Other Backward Caste" 4 "Other General Caste"
label values m1_q25_caste castelabels  

*** ANALYSIS - with i.m1_q25_caste 
xtlogit m2_q55_grid log_exp, fe
xtreg 	m2_q55_grid log_exp	[pweight=weight], fe vce(cluster village_code)
xtlogit m2_q55_grid	i.m1_q25_caste, fe
xtreg 	m2_q55_grid i.m1_q25_caste [pweight=weight], fe vce(cluster village_code)
xtlogit m2_q55_grid log_exp i.m1_q25_caste, fe
xtreg 	m2_q55_grid log_exp i.m1_q25_caste [pweight=weight], fe vce(cluster village_code)

*** ODDS RATIOS FOR CASTE
xtlogit m2_q55_grid i.m1_q25_caste, fe or
xtlogit m2_q55_grid log_exp i.m1_q25_caste, fe or

*** BASIC DESCRIPTIVES BY GROUP
tab m1_q25_caste

* SC 59, ST 68, OBC 60, GEN 74
svy: mean m2_q55_grid if m1_q25_caste == 1
svy: mean m2_q55_grid if m1_q25_caste == 2
svy: mean m2_q55_grid if m1_q25_caste == 3
svy: mean m2_q55_grid if m1_q25_caste == 4

* SC 61, ST 69, OBC 67, GEN 77
svy: mean m2_q68_elec if m1_q25_caste == 1
svy: mean m2_q68_elec if m1_q25_caste == 2
svy: mean m2_q68_elec if m1_q25_caste == 3
svy: mean m2_q68_elec if m1_q25_caste == 4

*** GRAPH
graph hbar m2_q55_grid [pweight=weight], over(m1_q25_caste) graphregion(fcolor(white)) ytitle(Households with Electricity Access (%)) ylabel(0 .2 "20" .4 "40" .6 "60" .8 "80")

*** Merging village level variables
save household_level, replace

clear 
use Village_master
keep village_code gini_expenditure Av_month_exp Av_land Av_year_saving Av_year_borrow Av_debt VillageGridElec_villquest Herfindahl_elec
sort village_code
merge 1:m village_code using household_level
save household_level, replace
drop _merge
svyset [pweight=weight], strata(village_code) singleunit(centered)
xtset village_code
 
*** regression with village info
xtlogit m2_q55_grid	log_exp i.m1_q25_caste, fe
xtreg 	m2_q55_grid	log_exp i.m1_q25_caste [pweight=weight], fe 
xtreg 	m2_q55_grid	log_exp i.m1_q25_caste [pweight=weight], fe vce(cluster village_code)
xtlogit m2_q55_grid	log_exp i.m1_q25_caste Av_month_exp 
xtreg 	m2_q55_grid	log_exp i.m1_q25_caste Av_month_exp	, vce(cluster village_code)





